Loading Required Libraries
library(ggplot2)
library(plotly)
library(gapminder)
library(plyr)
library(stringr)
library(dplyr)
library(lubridate)
library(magrittr)
library(forcats)
library(data.table)
Space_missions <- read.csv("C:/---/Space_missions/Space_Corrected.csv")
head(Space_missions)
## X Unnamed..0 Company.Name
## 1 0 0 SpaceX
## 2 1 1 CASC
## 3 2 2 SpaceX
## 4 3 3 Roscosmos
## 5 4 4 ULA
## 6 5 5 CASC
## Location
## 1 LC-39A, Kennedy Space Center, Florida, USA
## 2 Site 9401 (SLS-2), Jiuquan Satellite Launch Center, China
## 3 Pad A, Boca Chica, Texas, USA
## 4 Site 200/39, Baikonur Cosmodrome, Kazakhstan
## 5 SLC-41, Cape Canaveral AFS, Florida, USA
## 6 LC-9, Taiyuan Satellite Launch Center, China
## Datum
## 1 Fri Aug 07, 2020 05:12 UTC
## 2 Thu Aug 06, 2020 04:01 UTC
## 3 Tue Aug 04, 2020 23:57 UTC
## 4 Thu Jul 30, 2020 21:25 UTC
## 5 Thu Jul 30, 2020 11:50 UTC
## 6 Sat Jul 25, 2020 03:13 UTC
## Detail Status.Rocket Rocket
## 1 Falcon 9 Block 5 | Starlink V1 L9 & BlackSky StatusActive 50.0
## 2 Long March 2D | Gaofen-9 04 & Q-SAT StatusActive 29.75
## 3 Starship Prototype | 150 Meter Hop StatusActive
## 4 Proton-M/Briz-M | Ekspress-80 & Ekspress-103 StatusActive 65.0
## 5 Atlas V 541 | Perseverance StatusActive 145.0
## 6 Long March 4B | Ziyuan-3 03, Apocalypse-10 & NJU-HKU 1 StatusActive 64.68
## Status.Mission
## 1 Success
## 2 Success
## 3 Success
## 4 Success
## 5 Success
## 6 Success
str(Space_missions)
## 'data.frame': 4324 obs. of 9 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Unnamed..0 : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Company.Name : chr "SpaceX" "CASC" "SpaceX" "Roscosmos" ...
## $ Location : chr "LC-39A, Kennedy Space Center, Florida, USA" "Site 9401 (SLS-2), Jiuquan Satellite Launch Center, China" "Pad A, Boca Chica, Texas, USA" "Site 200/39, Baikonur Cosmodrome, Kazakhstan" ...
## $ Datum : chr "Fri Aug 07, 2020 05:12 UTC" "Thu Aug 06, 2020 04:01 UTC" "Tue Aug 04, 2020 23:57 UTC" "Thu Jul 30, 2020 21:25 UTC" ...
## $ Detail : chr "Falcon 9 Block 5 | Starlink V1 L9 & BlackSky" "Long March 2D | Gaofen-9 04 & Q-SAT" "Starship Prototype | 150 Meter Hop" "Proton-M/Briz-M | Ekspress-80 & Ekspress-103" ...
## $ Status.Rocket : chr "StatusActive" "StatusActive" "StatusActive" "StatusActive" ...
## $ Rocket : chr "50.0 " "29.75 " "" "65.0 " ...
## $ Status.Mission: chr "Success" "Success" "Success" "Success" ...
View(Space_missions)
Space_missions <- Space_missions %>% select(-c(X))
Space_missions <- Space_missions %>%
mutate(launch_date = as_date(parse_date_time(Datum, c("mdy HM", "mdy"), tz = "UTC")))
##Rename column names
Space_missions <- Space_missions %>%
rename(row_names=Unnamed..0)
summary(Space_missions)
## row_names Company.Name Location Datum
## Min. : 0 Length:4324 Length:4324 Length:4324
## 1st Qu.:1081 Class :character Class :character Class :character
## Median :2162 Mode :character Mode :character Mode :character
## Mean :2162
## 3rd Qu.:3242
## Max. :4323
## Detail Status.Rocket Rocket Status.Mission
## Length:4324 Length:4324 Length:4324 Length:4324
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## launch_date
## Min. :1957-10-04
## 1st Qu.:1972-04-19
## Median :1984-12-16
## Mean :1987-11-28
## 3rd Qu.:2002-09-10
## Max. :2020-08-07
sapply(Space_missions, function(x) sum(is.na(x)))
## row_names Company.Name Location Datum Detail
## 0 0 0 0 0
## Status.Rocket Rocket Status.Mission launch_date
## 0 0 0 0
Space_missions <- Space_missions %>%
mutate(country = word(Location,-1))
Space_missions %>% count(country, sort =T)
## country n
## 1 Russia 1395
## 2 USA 1344
## 3 Kazakhstan 701
## 4 France 303
## 5 China 268
## 6 Japan 126
## 7 India 76
## 8 Ocean 36
## 9 Iran 13
## 10 Zealand 13
## 11 Israel 11
## 12 Kenya 9
## 13 Korea 8
## 14 Australia 6
## 15 Mexico 4
## 16 Sea 4
## 17 Brazil 3
## 18 Canaria 2
## 19 Facility 1
## 20 Site 1
loc <- Space_missions%>%
select(country, Location)%>%
filter(country %in% c("Ocean", "Sea", "Facility", "Site"))
View(loc)
Space_missions <-
Space_missions %>% mutate(
country = case_when(
Location == "LP Odyssey, Kiritimati Launch Area, Pacific Ocean" ~ "Pacific Ocean",
Location == "LP-41, Kauai, Pacific Missile Range Facility" ~ "Range Facility",
Location == "K-84 Submarine, Barents Sea Launch Area, Barents Sea" |
# OR
Location == "K-496 Submarine, Barents Sea Launch Area, Barents Sea" |
# OR
Location == "K-407 Submarine, Barents Sea Launch Area, Barents Sea" ~ "Barents Sea",
Location == "Tai Rui Barge, Yellow Sea" ~ "Yellow Sea",
Location == "Launch Plateform, Shahrud Missile Test Site" ~ "Shahrud Missile Test Site",
Location == "Rocket Lab LC-1A, M?Â\u0081hia Peninsula, New Zealand" ~ "New Zealand",
TRUE ~ word(Location, -1)
)
)
Space_missions <- Space_missions %>%
mutate(
country = str_replace(country, "StatusRetired", replacement = "USA"),
country = str_replace(country, "Yellow Sea", replacement = "China"),
country = str_replace(country, "Russia", replacement = "Russian Federation"),
country = str_replace(country, "Shahrud Missile Test Site", replacement = "Iran"),
country = str_replace(country, "Range Facility", replacement = "USA"),
country = str_replace(country, "Barents Sea", replacement = "Russia"),
country = str_replace(country, "Canaria", replacement = "USA")
)
Space_missions %>% count(country, sort = T)
## country n
## 1 Russian Federation 1395
## 2 USA 1347
## 3 Kazakhstan 701
## 4 France 303
## 5 China 269
## 6 Japan 126
## 7 India 76
## 8 Pacific Ocean 36
## 9 Iran 14
## 10 New Zealand 13
## 11 Israel 11
## 12 Kenya 9
## 13 Korea 8
## 14 Australia 6
## 15 Mexico 4
## 16 Brazil 3
## 17 Russia 3
Space_missions %>%
group_by(country) %>%
summarise(count = n()) %>%
group_by(country) %>%
summarise(
count_total = sum(count) # Calculate the total count for each country
) %>%
arrange(desc(count_total)) %>% # Arrange countries by total count in descending order
ggplot(aes(
x = fct_reorder(country, -count_total), # Reorder countries by total number of launches
y = count_total
)) +
geom_col() +
labs(
title = "Top 20 Companies",
subtitle = "Column plot, Top 20 Companiess",
caption = "Kaggle: All Space Missions from 1957",
x = "Country name",
y = "Number of launches"
)
Space_missions %>%
select(country,Status.Mission)%>%
group_by(country,Status.Mission) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
ggplot(aes(x = fct_reorder(country, -count), y = count, fill = Status.Mission)) +
geom_bar(stat = "identity") +
labs(
title = "Top 20 Companies",
subtitle = "Column plot, Top 20 Companiess",
caption = "Kaggle: All Space Missions from 1957",
x = "Country name",
y = "Number of launches"
)
Space_missions %>%
count(country, Company.Name, sort = TRUE) %>%
arrange(desc(n))
## country Company.Name n
## 1 Russian Federation RVSN USSR 1198
## 2 Kazakhstan RVSN USSR 579
## 3 France Arianespace 277
## 4 China CASC 251
## 5 USA General Dynamics 251
## 6 USA NASA 203
## 7 USA US Air Force 161
## 8 Russian Federation VKS RF 157
## 9 USA ULA 140
## 10 USA Boeing 136
## 11 USA Martin Marietta 114
## 12 USA SpaceX 100
## 13 Japan MHI 84
## 14 USA Northrop 83
## 15 USA Lockheed 79
## 16 India ISRO 76
## 17 Kazakhstan Roscosmos 47
## 18 Kazakhstan VKS RF 44
## 19 USA ILS 40
## 20 Pacific Ocean Sea Launch 36
## 21 Japan ISAS 30
## 22 USA US Navy 17
## 23 France ESA 13
## 24 Iran ISA 13
## 25 New Zealand Rocket Lab 13
## 26 Russian Federation Eurockot 13
## 27 Kazakhstan Kosmotras 12
## 28 USA Blue Origin 12
## 29 Israel IAI 11
## 30 China ExPace 10
## 31 Russian Federation Kosmotras 10
## 32 Kenya ASI 9
## 33 France CNES 8
## 34 Russian Federation Roscosmos 8
## 35 Japan JAXA 7
## 36 Kazakhstan Land Launch 7
## 37 Russian Federation MITT 7
## 38 USA AMBA 7
## 39 Kazakhstan ILS 6
## 40 China CASIC 5
## 41 Japan UT 5
## 42 Korea KCST 5
## 43 France Arm??e de l'Air 4
## 44 Mexico Exos 4
## 45 Australia CECLES 3
## 46 Brazil AEB 3
## 47 Korea KARI 3
## 48 Russia SRC 3
## 49 Australia RAE 2
## 50 Kazakhstan Arianespace 2
## 51 Kazakhstan OKB-586 2
## 52 Australia AMBA 1
## 53 China i-Space 1
## 54 China Landspace 1
## 55 China OneSpace 1
## 56 France CECLES 1
## 57 Iran IRGC 1
## 58 Kazakhstan Starsem 1
## 59 Kazakhstan Yuzhmash 1
## 60 Russian Federation Khrunichev 1
## 61 Russian Federation Yuzhmash 1
## 62 USA Douglas 1
## 63 USA EER 1
## 64 USA Sandia 1
## 65 USA Virgin Orbit 1
Space_missions %>%
filter(country == "Russian Federation") %>%
count(Company.Name, sort = TRUE) %>%
arrange(desc(n))
## Company.Name n
## 1 RVSN USSR 1198
## 2 VKS RF 157
## 3 Eurockot 13
## 4 Kosmotras 10
## 5 Roscosmos 8
## 6 MITT 7
## 7 Khrunichev 1
## 8 Yuzhmash 1
Space_missions %>%
filter(country == "USA") %>%
count(Company.Name, sort = TRUE) %>%
arrange(desc(n))
## Company.Name n
## 1 General Dynamics 251
## 2 NASA 203
## 3 US Air Force 161
## 4 ULA 140
## 5 Boeing 136
## 6 Martin Marietta 114
## 7 SpaceX 100
## 8 Northrop 83
## 9 Lockheed 79
## 10 ILS 40
## 11 US Navy 17
## 12 Blue Origin 12
## 13 AMBA 7
## 14 Douglas 1
## 15 EER 1
## 16 Sandia 1
## 17 Virgin Orbit 1
Space_missions %>%
filter(country == "China") %>%
count(Company.Name, sort = TRUE) %>%
arrange(desc(n))
## Company.Name n
## 1 CASC 251
## 2 ExPace 10
## 3 CASIC 5
## 4 i-Space 1
## 5 Landspace 1
## 6 OneSpace 1
Space_missions %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
group_by(country) %>%
summarise(total = sum(n)) %>%
top_n(5, total) %>%
inner_join(Space_missions, by = "country") %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
ggplot(aes(x = reorder(Company.Name, n), y = n)) +
geom_col() +
geom_text(aes(label = n), vjust = -0.5, size = 3) +
theme_bw() +
facet_wrap(~country, ncol = 2, scales = "free_x") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
Space_missions %>%
filter(country %in% c("USA", "Russian Federation", "China")) %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
group_by(country) %>%
summarise(total = sum(n)) %>%
top_n(5, total) %>%
inner_join(Space_missions, by = "country") %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
ggplot(aes(x = reorder(Company.Name, n), y = n)) +
geom_col() +
geom_text(aes(label = n), vjust = -0.5, size = 3) +
theme_bw() +
facet_wrap(~country, ncol = 2, scales = "free_x") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Space_missions %>%
filter(country %in% c("USA", "Russian Federation", "China")) %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
group_by(country) %>%
summarise(total = sum(n)) %>%
top_n(5, total) %>%
inner_join(Space_missions, by = "country") %>%
group_by(country, Company.Name) %>%
summarise(n = n()) %>%
ggplot(aes(x = reorder(Company.Name, n), y = n, fill = n > 150)) +
geom_col() +
geom_text(aes(label = n), vjust = -0.5, size = 3) +
facet_wrap(~country, ncol = 2, scales = "free_x") +
scale_fill_manual(values = c("FALSE" = "white", "TRUE" = "red")) +
theme(
panel.background = element_rect(fill = "gray95", colour = NA)
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))